In [1]:
from IPython.core.interactiveshell import InteractiveShell
from IPython.display import display
import pandas as pd
import numpy as np
import json
import re
import matplotlib.pyplot as plt
import seaborn as sns
import spacy
import nltk
from wordcloud import WordCloud
import cv2
import os
In [2]:
# Sets the Ipython options

InteractiveShell.ast_node_interactivity = "all"
In [3]:
# Design

# Sizes

title_size = 50
tick_size = 30
text_size = 30

# Colors

sns.color_palette('PuOr')
Out[3]:
In [4]:
# Functions


def return_missing_values_df_bar(df, bars=True):

    """Displays nans in percent"""

    missing_percent_df = \
        pd.DataFrame(100 - (df.count()/df.shape[0])*100,
                     columns=['Missing Values'])

    if bars:
        return missing_percent_df[missing_percent_df['Missing Values'] > 0]\
                                 .style.bar(color=sns.color_palette('PuOr')
                                            .as_hex()[4])
    else:
        return missing_percent_df[missing_percent_df['Missing Values'] > 0]


def retrieve_image(image):

    """Returns a specified image from the below folder"""

    return Image.open(r'F:\Data\Projet 6\Flipkart\Images\{}'.format(image))


def create_category_node(value, node):

    """Creates a specified tree node
    according to the product_category_tree"""

    node = node - 1
    node_list = re.split('>>', value)

    if len(node_list) > node:
        # Removes brackets and "" in first and last node
        node_list[node] = re.sub(r'(\")|(\[)|(\])', '', node_list[node])

        return node_list[node].strip()
    else:
        return np.nan


def add_unique_id_to_product_specifications_dict(row):

    """Adds unique_id into product_specifications_dict"""

    unique_id_added = \
        re.sub('"}',
               '\", "uniq_id": "{}"'.format(row['uniq_id']) + '}',
               row['product_specifications_formatted'])

    return unique_id_added


def correct_product_specifications_json_conflicts(value):

    """Corrects some conflicts for product_specifications's
    json transformation """

    list_per_value = re.split('\",', value)

    is_json_compatible = []

    # General Cases

    for i in range(len(list_per_value)):
        if re.compile('\": \"').search(list_per_value[i]):
            is_json_compatible.append(1)
        else:
            is_json_compatible.append(0)

    is_json_compatible = np.array(is_json_compatible)

    reconstruction = ''
    others = ''

    for j in np.where(is_json_compatible == 1)[0]:
        reconstruction = reconstruction + list_per_value[j] + '\",'

    reconstruction = re.sub('}",', '', str(reconstruction))

    for k in np.where(is_json_compatible == 0)[0]:
        others = others + list_per_value[k] + ','

    others = re.sub('\"', '', others)

    reconstructed = reconstruction + ', \"Other\": \"' + others + '\"}'

    if re.compile('^{').search(reconstructed):

        return reconstructed

    # Particular cases (38 samples concerned)

    else:
        reconstructed = '{' + reconstructed
        reconstructed = re.sub(',,', ',', reconstructed)
        reconstructed = re.sub('\"\",', '\",', reconstructed)
        reconstructed = re.sub('^{, ', '{', reconstructed)

        return reconstructed

    
# NLP Functions


def tokenize_doc_no_punct(document):
    
    """Returns tokenized document.
    Tokens are in lower case. 
    The punctuation isn't kept."""
    
    doc = nlp.make_doc(document)
    tokens = [token.lower_ for token in doc if not token.is_punct and not token.is_space]
    
    return tokens


def tokenize_doc_no_punct_no_stop_words(document, stop_word_list):
    
    """Returns tokenized document.
    Tokens are in lower case. 
    The punctuation isn't kept.
    The stop words aren't kept"""
    
    doc = nlp.make_doc(document)
    tokens = [token.lower_ for token in doc if not token.is_punct and not token.is_space]
    
    # As the stop word list will be customized, the usage of token.is_stop isn't used
    # because of a spacy current bug 

    tokens = [token for token in tokens if token not in stop_word_list]
    
    return tokens
In [5]:
# Imports the dataset

flipkart_df = \
    pd.read_csv(
        r'F:\Data\Projet 6\Flipkart\flipkart_com-ecommerce_sample_1050.csv')

# Forks a copy of the dataframe

flipkart_fork_1 = flipkart_df.copy()

# Random Seed

random_seed = 34

# Path for images

path = r'F:\Data\Projet 6\Flipkart\Images\\'

EDA

Global

In [6]:
flipkart_fork_1.head()
Out[6]:
uniq_id crawl_timestamp product_url product_name product_category_tree pid retail_price discounted_price image is_FK_Advantage_product description product_rating overall_rating brand product_specifications
0 55b85ea15a1536d46b7190ad6fff8ce7 2016-04-30 03:22:56 +0000 http://www.flipkart.com/elegance-polyester-mul... Elegance Polyester Multicolor Abstract Eyelet ... ["Home Furnishing >> Curtains & Accessories >>... CRNEG7BKMFFYHQ8Z 1899.0 899.0 55b85ea15a1536d46b7190ad6fff8ce7.jpg False Key Features of Elegance Polyester Multicolor ... No rating available No rating available Elegance {"product_specification"=>[{"key"=>"Brand", "v...
1 7b72c92c2f6c40268628ec5f14c6d590 2016-04-30 03:22:56 +0000 http://www.flipkart.com/sathiyas-cotton-bath-t... Sathiyas Cotton Bath Towel ["Baby Care >> Baby Bath & Skin >> Baby Bath T... BTWEGFZHGBXPHZUH 600.0 449.0 7b72c92c2f6c40268628ec5f14c6d590.jpg False Specifications of Sathiyas Cotton Bath Towel (... No rating available No rating available Sathiyas {"product_specification"=>[{"key"=>"Machine Wa...
2 64d5d4a258243731dc7bbb1eef49ad74 2016-04-30 03:22:56 +0000 http://www.flipkart.com/eurospa-cotton-terry-f... Eurospa Cotton Terry Face Towel Set ["Baby Care >> Baby Bath & Skin >> Baby Bath T... BTWEG6SHXTDB2A2Y NaN NaN 64d5d4a258243731dc7bbb1eef49ad74.jpg False Key Features of Eurospa Cotton Terry Face Towe... No rating available No rating available Eurospa {"product_specification"=>[{"key"=>"Material",...
3 d4684dcdc759dd9cdf41504698d737d8 2016-06-20 08:49:52 +0000 http://www.flipkart.com/santosh-royal-fashion-... SANTOSH ROYAL FASHION Cotton Printed King size... ["Home Furnishing >> Bed Linen >> Bedsheets >>... BDSEJT9UQWHDUBH4 2699.0 1299.0 d4684dcdc759dd9cdf41504698d737d8.jpg False Key Features of SANTOSH ROYAL FASHION Cotton P... No rating available No rating available SANTOSH ROYAL FASHION {"product_specification"=>[{"key"=>"Brand", "v...
4 6325b6870c54cd47be6ebfbffa620ec7 2016-06-20 08:49:52 +0000 http://www.flipkart.com/jaipur-print-cotton-fl... Jaipur Print Cotton Floral King sized Double B... ["Home Furnishing >> Bed Linen >> Bedsheets >>... BDSEJTHNGWVGWWQU 2599.0 698.0 6325b6870c54cd47be6ebfbffa620ec7.jpg False Key Features of Jaipur Print Cotton Floral Kin... No rating available No rating available Jaipur Print {"product_specification"=>[{"key"=>"Machine Wa...
In [7]:
flipkart_fork_1.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1050 entries, 0 to 1049
Data columns (total 15 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   uniq_id                  1050 non-null   object 
 1   crawl_timestamp          1050 non-null   object 
 2   product_url              1050 non-null   object 
 3   product_name             1050 non-null   object 
 4   product_category_tree    1050 non-null   object 
 5   pid                      1050 non-null   object 
 6   retail_price             1049 non-null   float64
 7   discounted_price         1049 non-null   float64
 8   image                    1050 non-null   object 
 9   is_FK_Advantage_product  1050 non-null   bool   
 10  description              1050 non-null   object 
 11  product_rating           1050 non-null   object 
 12  overall_rating           1050 non-null   object 
 13  brand                    712 non-null    object 
 14  product_specifications   1049 non-null   object 
dtypes: bool(1), float64(2), object(12)
memory usage: 116.0+ KB
In [8]:
flipkart_fork_1.shape
Out[8]:
(1050, 15)
In [9]:
flipkart_fork_1.nunique()
Out[9]:
uniq_id                    1050
crawl_timestamp             149
product_url                1050
product_name               1050
product_category_tree       642
pid                        1050
retail_price                354
discounted_price            424
image                      1050
is_FK_Advantage_product       2
description                1050
product_rating               27
overall_rating               27
brand                       490
product_specifications      984
dtype: int64
In [10]:
# Displays Missing Values

return_missing_values_df_bar(flipkart_fork_1)
Out[10]:
  Missing Values
retail_price 0.095238
discounted_price 0.095238
brand 32.190476
product_specifications 0.095238

Numerical

In [11]:
flipkart_fork_1.describe()
Out[11]:
retail_price discounted_price
count 1049.000000 1049.000000
mean 2186.197331 1584.527169
std 7639.229411 7475.099680
min 35.000000 35.000000
25% 555.000000 340.000000
50% 999.000000 600.000000
75% 1999.000000 1199.000000
max 201000.000000 201000.000000
In [12]:
# Plots numerical features

for i in flipkart_fork_1.select_dtypes('float').columns:

    fig, ax = plt.subplots(figsize=(20, 10))
    sns.histplot(data=flipkart_fork_1,
                 x=i,
                 kde=True,
                 color=sns.color_palette('PuOr')[5],
                 ax=ax)
    ax.tick_params(labelsize=tick_size)
    ax.set_xlabel('{}'.format(i), fontsize=text_size)
    ax.set_ylabel('Count', fontsize=text_size)
    plt.title('{}'.format(i), fontsize=title_size, pad=20)

plt.show();

Categorical

Tabularisation of some categorical data

In [13]:
# Category tree

# Displays an example of the structure of the category tree

flipkart_fork_1['product_category_tree'].tolist()[5]

# Displays the count of maximum nodes per tree

flipkart_fork_1['product_category_tree']\
    .apply(lambda x: len(re.split('>>', x)))\
    .value_counts()
Out[13]:
'["Watches >> Wrist Watches >> Maserati Time Wrist Watches"]'
Out[13]:
3    368
5    278
4    274
6     70
7     57
2      3
Name: product_category_tree, dtype: int64
In [14]:
# Creates the first three category nodes

flipkart_fork_1['product_category_node_1'] = \
    flipkart_fork_1['product_category_tree']\
    .apply(lambda x: create_category_node(x, 1))

flipkart_fork_1['product_category_node_2'] = \
    flipkart_fork_1['product_category_tree']\
    .apply(lambda x: create_category_node(x, 2))

flipkart_fork_1['product_category_node_3'] = \
    flipkart_fork_1['product_category_tree']\
    .apply(lambda x: create_category_node(x, 3))

# Displays the head of created features

flipkart_fork_1[['product_category_node_1',
                 'product_category_node_2',
                 'product_category_node_3']].head()
Out[14]:
product_category_node_1 product_category_node_2 product_category_node_3
0 Home Furnishing Curtains & Accessories Curtains
1 Baby Care Baby Bath & Skin Baby Bath Towels
2 Baby Care Baby Bath & Skin Baby Bath Towels
3 Home Furnishing Bed Linen Bedsheets
4 Home Furnishing Bed Linen Bedsheets
In [15]:
# Product Specification

# Displays an example of the structure of the product specification

flipkart_fork_1['product_specifications'].tolist()[0]
Out[15]:
'{"product_specification"=>[{"key"=>"Brand", "value"=>"Elegance"}, {"key"=>"Designed For", "value"=>"Door"}, {"key"=>"Type", "value"=>"Eyelet"}, {"key"=>"Model Name", "value"=>"Abstract Polyester Door Curtain Set Of 2"}, {"key"=>"Model ID", "value"=>"Duster25"}, {"key"=>"Color", "value"=>"Multicolor"}, {"key"=>"Length", "value"=>"213 cm"}, {"key"=>"Number of Contents in Sales Package", "value"=>"Pack of 2"}, {"key"=>"Sales Package", "value"=>"2 Curtains"}, {"key"=>"Material", "value"=>"Polyester"}]}'
In [16]:
# Removes principal redundancies

flipkart_fork_1['product_specifications_formatted'] = \
    flipkart_fork_1['product_specifications']\
    .apply(lambda x:
           re.sub('(\"product_specification\")|(\"key\")|(\"value\")|(=>)',
                  '', str(x)))

flipkart_fork_1['product_specifications_formatted'].tolist()[0]
Out[16]:
'{[{"Brand", "Elegance"}, {"Designed For", "Door"}, {"Type", "Eyelet"}, {"Model Name", "Abstract Polyester Door Curtain Set Of 2"}, {"Model ID", "Duster25"}, {"Color", "Multicolor"}, {"Length", "213 cm"}, {"Number of Contents in Sales Package", "Pack of 2"}, {"Sales Package", "2 Curtains"}, {"Material", "Polyester"}]}'
In [17]:
# Changes ", sequence in ": for dict style format preparation

flipkart_fork_1['product_specifications_formatted'] = \
    flipkart_fork_1['product_specifications_formatted']\
    .apply(lambda x: re.sub('\",', '\":', x))

flipkart_fork_1['product_specifications_formatted'].tolist()[0]
Out[17]:
'{[{"Brand": "Elegance"}, {"Designed For": "Door"}, {"Type": "Eyelet"}, {"Model Name": "Abstract Polyester Door Curtain Set Of 2"}, {"Model ID": "Duster25"}, {"Color": "Multicolor"}, {"Length": "213 cm"}, {"Number of Contents in Sales Package": "Pack of 2"}, {"Sales Package": "2 Curtains"}, {"Material": "Polyester"}]}'
In [18]:
# Removes brackets

flipkart_fork_1['product_specifications_formatted'] =\
    flipkart_fork_1['product_specifications_formatted']\
    .apply(lambda x: re.sub('({)|(})', '', x))

flipkart_fork_1['product_specifications_formatted'].tolist()[0]
Out[18]:
'["Brand": "Elegance", "Designed For": "Door", "Type": "Eyelet", "Model Name": "Abstract Polyester Door Curtain Set Of 2", "Model ID": "Duster25", "Color": "Multicolor", "Length": "213 cm", "Number of Contents in Sales Package": "Pack of 2", "Sales Package": "2 Curtains", "Material": "Polyester"]'
In [19]:
# Removes brackets

flipkart_fork_1['product_specifications_formatted'] = \
    flipkart_fork_1['product_specifications_formatted']\
    .apply(lambda x: re.sub(r'\[', '{', x))\
    .apply(lambda x: re.sub(r'\]', '}', x))

flipkart_fork_1['product_specifications_formatted'].tolist()[0]
Out[19]:
'{"Brand": "Elegance", "Designed For": "Door", "Type": "Eyelet", "Model Name": "Abstract Polyester Door Curtain Set Of 2", "Model ID": "Duster25", "Color": "Multicolor", "Length": "213 cm", "Number of Contents in Sales Package": "Pack of 2", "Sales Package": "2 Curtains", "Material": "Polyester"}'
In [20]:
# Adds unique id in product_specifications_formatted

flipkart_fork_1['product_specifications_formatted'] = \
    flipkart_fork_1[['product_specifications_formatted', 'uniq_id']]\
    .apply(lambda row: add_unique_id_to_product_specifications_dict(row),
           axis=1)

flipkart_fork_1['product_specifications_formatted'].tolist()[0]
Out[20]:
'{"Brand": "Elegance", "Designed For": "Door", "Type": "Eyelet", "Model Name": "Abstract Polyester Door Curtain Set Of 2", "Model ID": "Duster25", "Color": "Multicolor", "Length": "213 cm", "Number of Contents in Sales Package": "Pack of 2", "Sales Package": "2 Curtains", "Material": "Polyester", "uniq_id": "55b85ea15a1536d46b7190ad6fff8ce7"}'
In [21]:
# Reformats for json compatibility

flipkart_fork_1['product_specifications_formatted'] = \
    flipkart_fork_1['product_specifications_formatted']\
    .apply(correct_product_specifications_json_conflicts)

flipkart_fork_1['product_specifications_formatted'].tolist()[4]
Out[21]:
'{"Machine Washable": "Yes", "Brand": "Jaipur Print", "Type": "Flat", "Model Name": "jaipur117", "Material": "Cotton", "Thread Count": "140", "Model ID": "jaipur117", "Fabric Care": "machinewash, do not bleach", "Size": "King", "Color": "White", "Flat Sheet Width": "86 inch / 220 cm", "Fitted Sheet Width": "0 cm", "Pillow Cover Width": "17 inch / 45 cm", "Pillow Cover Length": "29 inch / 75 cm", "Weight": "900 g", "Fitted Sheet Depth": "0 cm", "Fitted Sheet Length": "0 cm", "Flat Sheet Depth": "0.2 cm", "Flat Sheet Length": "104 inch / 265 cm", "Number of Contents in Sales Package": "1", "Sales Package": "1 bed sheet 2 pillow cover", "uniq_id": "6325b6870c54cd47be6ebfbffa620ec7", "Other": " waranty of the product only for manufacturing defect only and product will exchange onle when it is not used and returne its origional packing,"}'
In [22]:
# Creates a product_specifications table

product_specifications_list = \
    flipkart_fork_1['product_specifications_formatted'].tolist()

json_list = []

for i, j in enumerate(product_specifications_list):
    json_list.append(json.loads(j))


product_specifications_df = pd.DataFrame(json_list)

product_specifications_df.head()
Out[22]:
Brand Designed For Type Model Name Model ID Color Length Number of Contents in Sales Package Sales Package Material ... Error Check Memory Configuration Memory Clock Technology Re-usable Area of Use Other Traits Acid Free Transfer Paper Printed Text
0 Elegance Door Eyelet Abstract Polyester Door Curtain Set Of 2 Duster25 Multicolor 213 cm Pack of 2 2 Curtains Polyester ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
1 Sathiyas NaN Bath Towel Sathiyas cotton bath towel asvtwl322 Red, Yellow, Blue 30 inch 3 3 Bath Towel Cotton ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
2 Eurospa NaN Face Towel Set SHUVAM20PCFTSETASSORTED SHUVAM20PCFTSETASSORTED Assorted 9 inch 20 20 PIECE FACE TOWEL SET Cotton Terry ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
3 SANTOSH ROYAL FASHION NaN Flat Gold Design Royal Cotton Printed Wedding and G... goldbedi-38 Multicolor NaN 1 1 Bedsheet,2 Pillow Cover Cotton ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN
4 Jaipur Print NaN Flat jaipur117 jaipur117 White NaN 1 1 bed sheet 2 pillow cover Cotton ... NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

5 rows × 609 columns

In [23]:
# Creates a serie with the percentage of missing values inside
# product_specifications_df

product_specifications_na_serie = \
    ((product_specifications_df.isna().sum()
      / product_specifications_df.shape[0])
     .mul(100))

# Displays missing values according to a mask

return_missing_values_df_bar(
    product_specifications_df[product_specifications_na_serie
                              .where(product_specifications_na_serie < 80)
                              .dropna().index])
Out[23]:
  Missing Values
Brand 46.095238
Type 30.666667
Model Name 67.238095
Color 52.095238
Number of Contents in Sales Package 71.714286
Sales Package 51.333333
Material 60.952381
uniq_id 1.047619
Ideal For 55.428571
Width 63.619048
Weight 68.952381
Style Code 79.333333
Height 72.000000
Model Number 79.619048
In [24]:
# Retrieves the indexes of missing brands in flipkart_fork_1

missing_brand_uniq_id = \
    flipkart_fork_1[
        flipkart_fork_1['brand'].isna()]['uniq_id'].values

# Checks if the brand appears in product_specifications_df
# No brands are imputable

product_specifications_df[
    product_specifications_df['uniq_id']
    .isin(missing_brand_uniq_id)]['Brand'].dropna()
Out[24]:
Series([], Name: Brand, dtype: object)

Categorical Data Stats

In [25]:
# Plots product_category_node_1's countplot

fig, ax = plt.subplots(figsize=(20, 10))
sns.countplot(data=flipkart_fork_1,
              x='product_category_node_1',
              palette=sns.color_palette('PuOr', 7),
              ax=ax)

ax.tick_params(labelsize=tick_size)
ax.tick_params(axis='x', rotation=90)
ax.set_xlabel('product_category_node_1', fontsize=text_size)
ax.set_ylabel('Count', fontsize=text_size)

plt.title('product_category_node_1\'s countplot',
          fontsize=title_size,
          pad=30)
plt.show();

# Plots product_category_node_2's countplot

fig, ax = plt.subplots(figsize=(20, 10))
sns.countplot(data=flipkart_fork_1,
              x='product_category_node_2',
              palette=sns.color_palette('PuOr', 7),
              ax=ax)

ax.tick_params(labelsize=tick_size - 20)
ax.tick_params(axis='x', rotation=90)
ax.set_xlabel('product_category_node_2', fontsize=text_size)
ax.set_ylabel('Count', fontsize=text_size)

plt.title('product_category_node_2\'s countplot',
          fontsize=title_size,
          pad=30)
plt.show();
In [26]:
# Double check

flipkart_fork_1['product_category_node_1'].value_counts()
Out[26]:
Home Furnishing               150
Baby Care                     150
Watches                       150
Home Decor & Festive Needs    150
Kitchen & Dining              150
Beauty and Personal Care      150
Computers                     150
Name: product_category_node_1, dtype: int64
In [47]:
# Defines dictionnary of category node 1 with relative numerical idx

product_category_node_1_dict = {
    'Home Furnishing': 0,
    'Baby Care': 1,
    'Watches': 2,
    'Home Decor & Festive Needs': 3,
    'Kitchen & Dining': 4,
    'Beauty and Personal Care': 5,
    'Computers': 6}

# Creates product category node 1 numerical serie

flipkart_fork_1['product_category_node_1_numerical'] = \
    flipkart_fork_1['product_category_node_1']\
        .apply(lambda x: product_category_node_1_dict[x])

# Displays head of product_category_node_1_numerical

flipkart_fork_1['product_category_node_1_numerical'].head()
Out[47]:
0    0
1    1
2    1
3    0
4    0
Name: product_category_node_1_numerical, dtype: int64

NLP EDA

In [27]:
# Imports spacy english large pipeline

nlp = spacy.load("en_core_web_lg")
In [28]:
# Tokenizes the descriptions

flipkart_fork_1['description_tokenized_no_punct'] = \
    flipkart_fork_1['description'].apply(tokenize_doc_no_punct)

flipkart_fork_1['description_tokenized_no_punct']
Out[28]:
0       [key, features, of, elegance, polyester, multi...
1       [specifications, of, sathiyas, cotton, bath, t...
2       [key, features, of, eurospa, cotton, terry, fa...
3       [key, features, of, santosh, royal, fashion, c...
4       [key, features, of, jaipur, print, cotton, flo...
                              ...                        
1045    [oren, empower, extra, large, self, adhesive, ...
1046    [wallmantra, large, vinyl, sticker, sticker, p...
1047    [buy, uberlyfe, extra, large, pigmented, polyv...
1048    [buy, wallmantra, medium, vinyl, sticker, stic...
1049    [buy, uberlyfe, large, vinyl, sticker, for, rs...
Name: description_tokenized_no_punct, Length: 1050, dtype: object
In [29]:
# Creates a list of the documents tokenized, this is the corpora 

corpora = flipkart_fork_1['description_tokenized_no_punct'].tolist()

# Computes one list for all the corpora lists

corpora_flat = [item for sublist in corpora for item in sublist]

# Computes the words occurences into a FreqDist object

corpora_frequency = nltk.FreqDist(corpora_flat)
In [30]:
# Plots countplot

fig, ax = plt.subplots(figsize=(20, 10))

ax = corpora_frequency.plot(50,
                            cumulative=False,
                            show=False,
                            color=sns.color_palette('PuOr')[5])

ax.tick_params(labelsize=tick_size - 10)
ax.set_xlabel('Samples', fontsize=text_size)
ax.set_ylabel('Count', fontsize=text_size)

plt.title('Top 50 Word Freqency Plot',
          fontsize=title_size - 10,
          pad=20)

plt.show();

# Plots Wordcloud

wcloud = \
    WordCloud(colormap=sns.color_palette('PuOr', as_cmap=True),
              background_color='black',
              contour_color='black',
              width=600,
              height=300,
              max_words=50,
              random_state=random_seed)\
    .generate_from_frequencies(corpora_frequency)

fig, ax = plt.subplots(1, 1, figsize=(20, 10))

plt.imshow(wcloud, interpolation='bilinear')
plt.axis("off")
plt.show();
In [31]:
# Displays the 10 first descriptions per categories

for i in flipkart_fork_1['product_category_node_1'].unique():
    print('\033[1m' + i + ":")
    
    flipkart_fork_1[
    flipkart_fork_1['product_category_node_1'] == i]\
    ['description'].iloc[:10].tolist()
Home Furnishing:
Out[31]:
['Key Features of Elegance Polyester Multicolor Abstract Eyelet Door Curtain Floral Curtain,Elegance Polyester Multicolor Abstract Eyelet Door Curtain (213 cm in Height, Pack of 2) Price: Rs. 899 This curtain enhances the look of the interiors.This curtain is made from 100% high quality polyester fabric.It features an eyelet style stitch with Metal Ring.It makes the room environment romantic and loving.This curtain is ant- wrinkle and anti shrinkage and have elegant apparance.Give your home a bright and modernistic appeal with these designs. The surreal attention is sure to steal hearts. These contemporary eyelet and valance curtains slide smoothly so when you draw them apart first thing in the morning to welcome the bright sun rays you want to wish good morning to the whole world and when you draw them close in the evening, you create the most special moments of joyous beauty given by the soothing prints. Bring home the elegant curtain that softly filters light in your room so that you get the right amount of sunlight.,Specifications of Elegance Polyester Multicolor Abstract Eyelet Door Curtain (213 cm in Height, Pack of 2) General Brand Elegance Designed For Door Type Eyelet Model Name Abstract Polyester Door Curtain Set Of 2 Model ID Duster25 Color Multicolor Dimensions Length 213 cm In the Box Number of Contents in Sales Package Pack of 2 Sales Package 2 Curtains Body & Design Material Polyester',
 'Key Features of SANTOSH ROYAL FASHION Cotton Printed King sized Double Bedsheet Royal Bedsheet Perfact for Wedding & Gifting,Specifications of SANTOSH ROYAL FASHION Cotton Printed King sized Double Bedsheet (1 Bedsheet,2 Pillow Cover, Multicolor) General Brand SANTOSH ROYAL FASHION Machine Washable Yes Type Flat Material Cotton Model Name Gold Design Royal Cotton Printed Wedding & Gifted Double Bedsheet With 2 Pillow cover Model ID goldbedi-38 Color Multicolor Size King Fabric Care Machine Wash, Do Not Bleach Dimensions Flat Sheet Width 90 inch / 230 cm Fitted Sheet Width 228 cm Pillow Cover Width 16 inch / 43 cm Pillow Cover Length 28 inch / 72 cm Fitted Sheet Depth 280 cm Fitted Sheet Length 278 cm Flat Sheet Depth 282 cm Flat Sheet Length 110 inch / 280 cm In the Box Number of Contents in Sales Package 1 Sales Package 1 Bedsheet,2 Pillow Cover',
 'Key Features of Jaipur Print Cotton Floral King sized Double Bedsheet 100% cotton,Jaipur Print Cotton Floral King sized Double Bedsheet (1 bed sheet 2 pillow cover, White) Price: Rs. 998 This nice bed sheet made up of 100% cotton to give you comfort. This bed sheet is hand printed. This bedsheet gives nice look to your room And its fast colour and good quality gives this bedsheet long life.,Specifications of Jaipur Print Cotton Floral King sized Double Bedsheet (1 bed sheet 2 pillow cover, White) General Machine Washable Yes Brand Jaipur Print Type Flat Model Name jaipur117 Material Cotton Thread Count 140 Model ID jaipur117 Fabric Care machinewash, do not bleach Size King Color White Warranty waranty of the product only for manufacturing defect only and product will exchange onle when it is not used and returne its origional packing Dimensions Flat Sheet Width 86 inch / 220 cm Fitted Sheet Width 0 cm Pillow Cover Width 17 inch / 45 cm Pillow Cover Length 29 inch / 75 cm Weight 900 g Fitted Sheet Depth 0 cm Fitted Sheet Length 0 cm Flat Sheet Depth 0.2 cm Flat Sheet Length 104 inch / 265 cm In the Box Number of Contents in Sales Package 1 Sales Package 1 bed sheet 2 pillow cover',
 'Key Features of SANTOSH ROYAL FASHION Cotton Embroidered Diwan Set Color: Multicolor No of Contents: 8 Diwan Sheet Length225 cm Cushion Cover Length:40 cm,SANTOSH ROYAL FASHION Cotton Embroidered Diwan Set Price: Rs. 979 Add oodles of style to your home with an exciting range of designer furniture, furnishings, decor items and kitchenware. We promise to deliver best quality products at best prices.,Specifications of SANTOSH ROYAL FASHION Cotton Embroidered Diwan Set General Brand SANTOSH ROYAL FASHION Material Cotton Pattern Embroidered Style Code dsnsan-34 Color Multicolor Dimensions Bolster Cover Length 23 inch / 40 cm Cushion Cover Width 15 inch / 40 cm Diwan Sheet Length 88 inch / 225 cm Cushion Cover Length 15 inch / 40 cm Diwan Sheet Width 59 inch / 150 cm In the Box Number of Contents in Sales Package Pack of 8 Sales Package 1 Diwan Sheet,5 Cushion Covers, 2 Bolster Covers Additional Features Fabric Care Machine Washable, Do Not Soak',
 'Key Features of House This Queen Cotton Duvet Cover Material:100% Cotton Outer.Inner Polyfill Thrade :210 Dimension: Duvet Cover 229X274 Cms 1 Double Duvet Cover,Specifications of House This Queen Cotton Duvet Cover (Grey) General Brand House This Closure Button Design Code P21821 Material Cotton Pattern Printed Thread Count 210 Style Code Dco-Smart Stripe-Black Grey Size Queen Color Grey Dimensions Length 107 inch / 274 cm Width 90 inch / 229 cm In the Box Number of Contents in Sales Package Pack of 1 Fabric Care Machine Washable, Do Not Soak Additional Features Reversible No',
 'Buy Riva Carpets Cotton Free Bath Mat Classic Loop Shag Bathmat_RI-527 at Rs. 1799 at Flipkart.com. Only Genuine Products. Free Shipping. Cash On Delivery!',
 'Key Features of JMD Printed Cushions Cover Printed 40x40cm size Made of Cotton,Specifications of JMD Printed Cushions Cover (Pack of 2, 40 cm*40 cm, Multicolor) General Brand JMD Suitable For Cushions Design Code C03 Type Square Material Cotton Style Code C03 Pattern Printed Color Multicolor Dimensions Height 15 inch / 40 cm Width 15 inch / 40 cm Additional Features Reversible No In the Box Number of Contents in Sales Package Pack of 2 Sales Package Cushion Pillow Cover',
 "Key Features of Kripa's Printed Cushions Cover Length 45 cm Width 45 cm,Specifications of Kripa's Printed Cushions Cover (Pack of 2, 45 cm*45 cm, Grey) General Brand Kripa's Suitable For Cushions Design Code CC-MH-02 Type Square Material Cotton Style Code CC-MH-02 Pattern Printed Color Grey Dimensions Height 17 inch / 45 cm Width 17 inch / 45 cm Additional Features Reversible Yes In the Box Number of Contents in Sales Package Pack of 2",
 'Key Features of Prime Printed 4 Seater Table Cover Length 60 inch/152 cm Width 40 inch/101 cm,Prime Printed 4 Seater Table Cover (Multicolor, PVC) Price: Rs. 499 Prime Center Table Cover Printed 4 Seater,Specifications of Prime Printed 4 Seater Table Cover (Multicolor, PVC) In The Box Number of Contents in Sales Package Pack of 1 General Brand Prime Type Table Cover Model Name 0.281 Material PVC Model ID 281 Color Multicolor Dimensions Weight 250 g Length 60 inch / 152 cm Width 40 inch / 101 cm Seating Capacity 4 Seater',
 'Key Features of Prime Printed 8 Seater Table Cover Length 90 inch/228 cm Width 60 inch/152 cm,Prime Printed 8 Seater Table Cover (Multicolor, PVC) Price: Rs. 899 Prime Dining Table Cover Printed 8 Seater,Specifications of Prime Printed 8 Seater Table Cover (Multicolor, PVC) In The Box Number of Contents in Sales Package Pack of 1 General Brand Prime Type Table Cover Model Name 290P Material PVC Model ID 290 Color Multicolor Dimensions Weight 550 g Length 90 inch / 228 cm Width 60 inch / 152 cm Seating Capacity 8 Seater']
Baby Care:
Out[31]:
['Specifications of Sathiyas Cotton Bath Towel (3 Bath Towel, Red, Yellow, Blue) Bath Towel Features Machine Washable Yes Material Cotton Design Self Design General Brand Sathiyas Type Bath Towel GSM 500 Model Name Sathiyas cotton bath towel Ideal For Men, Women, Boys, Girls Model ID asvtwl322 Color Red, Yellow, Blue Size Mediam Dimensions Length 30 inch Width 60 inch In the Box Number of Contents in Sales Package 3 Sales Package 3 Bath Towel',
 'Key Features of Eurospa Cotton Terry Face Towel Set Size: small Height: 9 inch GSM: 360,Eurospa Cotton Terry Face Towel Set (20 PIECE FACE TOWEL SET, Assorted) Price: Rs. 299 Eurospa brings to you an exclusively designed, 100% soft cotton towels of export quality. All our products have soft texture that takes care of your skin and gives you that enriched feeling you deserve. Eurospa has been exporting its bath towels to lot of renowned brands for last 10 years and is famous for its fine prints, absorbency, softness and durability. NOTE: Our product is 100% cotton, so it is susceptible to shrinkage. Product color may vary from the picture. Size may vary by ±3% WASH CARE: Wash in cold Water, Do not Iron, Do not Bleach, Flat dry, Wash before first use. SIZE- FACE TOWEL - 23 cms X 23 cms.,Specifications of Eurospa Cotton Terry Face Towel Set (20 PIECE FACE TOWEL SET, Assorted) Bath Towel Features Material Cotton Terry Design SHUVAM General Brand Eurospa GSM 360 Type Face Towel Set Model Name SHUVAM20PCFTSETASSORTED Ideal For Boys, Girls, Men, Women Model ID SHUVAM20PCFTSETASSORTED Size small Color Assorted Dimensions Weight 350 g Length 9 inch Width 9 inch In the Box Number of Contents in Sales Package 20 Sales Package 20 PIECE FACE TOWEL SET',
 "Key Features of Mom and Kid Baby Girl's Printed Green Top & Pyjama Set Fabric: Cotton Brand Color: Green,Mom and Kid Baby Girl's Printed Green Top & Pyjama Set Price: Rs. 309 Girls Pyjama set,Specifications of Mom and Kid Baby Girl's Printed Green Top & Pyjama Set General Details Pattern Printed Ideal For Baby Girl's Night Suit Details Fabric Cotton Type Top & Pyjama Set Neck Round Neck In the Box 1 Top & Pyjama Set",
 "Key Features of Mom and Kid Baby Girl's Printed Blue, Grey Top & Pyjama Set Fabric: Cotton Brand Color: Blue, Grey,Mom and Kid Baby Girl's Printed Blue, Grey Top & Pyjama Set Price: Rs. 309 Girls Pyjamaset,Specifications of Mom and Kid Baby Girl's Printed Blue, Grey Top & Pyjama Set General Details Pattern Printed Ideal For Baby Girl's Night Suit Details Number of Contents in Sales Package Pack of 1 Fabric Cotton Type Top & Pyjama Set Neck Round Nack In the Box 1 Top & Pyjama Set",
 'Key Features of KOHL Wine Bag Yellow Easy to carry GiftBags JuteBags Bits&Bobs,Specifications of KOHL Wine Bag Yellow (Yellow) General Brand KOHL Insulated No Type Bottle Cover Series Wine Bags Material Jute Washable Yes Model Name Wine Bag Yellow Ideal For Boys, Girls Model ID 3 Color Yellow Size Large In the Box Number of Contents in Sales Package 1 Sales Package Bottle Cover Additional Features Easy to carry, GiftBags, JuteBags, Bits&Bobs',
 'Specifications of CHHOTE JANAB COZY MATTRESS PROTECTOR(SET OF 2) (MULTI) Baby Mattress Features Material COTTON General Brand CHHOTE JANAB Type WATERPROOF Model Name COZY MATTRESS PROTECTOR(SET OF 2) Pattern PLAIN Model ID ED104 Color MULTI Additional Features SOFT AND COMFORTABLE, WATERPROOF, SET OF 2, COLOUR MAY BE VARY. In the Box Sales Package 2 MATTRESS',
 "Specifications of Vitamins Solid Baby Girl's Basic Shorts Shorts Details Number of Contents in Sales Package Pack of 1 Fabric Cotton Type Basic Shorts General Details Pattern Solid Occasion Casual Ideal For Baby Girl's In the Box 1 SHORT Additional Details Style Code 05TG-870-20-N.BLUE Fabric Care Gentle Machine Wash in Lukewarm Water, Do Not Bleach",
 "Key Features of Vitamins Graphic Print Baby Girl's Basic Shorts Cotton ANTRA MELANGE,Specifications of Vitamins Graphic Print Baby Girl's Basic Shorts General Details Ideal For Baby Girl's Occasion Casual Pattern Graphic Print Shorts Details Type Basic Shorts Fabric Cotton Number of Contents in Sales Package Pack of 1 Fabric Care Gentle Machine Wash in Lukewarm Water, Do Not Bleach Additional Details Style Code 05TG-870-26-ANTRA MELANGE In the Box 1 SHORTS",
 "Specifications of Vitamins Embroidered Baby Girl's Denim Shorts General Details Ideal For Baby Girl's Occasion Casual Pattern Embroidered Shorts Details Type Denim Shorts Fabric Cotton Pockets Mitered Patch Pocket on Thigh Number of Contents in Sales Package Pack of 1 Fabric Care Gentle Machine Wash in Lukewarm Water, Do Not Bleach Additional Details Style Code 05TG-166-24-RAW RANI In the Box 1 SHORTS",
 "Key Features of florence9 baby cap red checks Cap red checks Cotton,Specifications of florence9 baby cap red checks Cap Cap Details Number of Contents in Sales Package Pack of 1 Fabric Cotton Type baby cap red checks General Details Occasion Casual Ideal For Baby Boy's, Baby Girl's Additional Details Style Code baby cap red checks"]
Watches:
Out[31]:
['Maserati Time R8851116001 Analog Watch  - For Boys - Buy Maserati Time R8851116001 Analog Watch  - For Boys  R8851116001 Online at Rs.24400 in India Only at Flipkart.com. - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'Camerii WM64 Elegance Analog Watch  - For Men, Boys - Buy Camerii WM64 Elegance Analog Watch  - For Men, Boys  WM64 Online at Rs.449 in India Only at Flipkart.com. - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'T STAR UFT-TSW-005-BK-BR Analog Watch  - For Boys\r\n                         Price: Rs. 399\r\n\t\t\t\t\r\n\t\t\tWhether you are on your way to work or travelling abroad with family, lifestyle accessories like watches, wallets and belts help to add a touch of sophistication and class to your otherwise mundane and regular daily wear. When it all comes down to it, suave leather belts and intricately designed and finished timepieces are what separate you from the rest.\r\nWhether you are on your way to work or travelling abroad with family, lifestyle accessories like watches, wallets and belts help to add a touch of sophistication and class to your otherwise mundane and regular daily wear. When it all comes down to it, suave leather belts and intricately designed and finished timepieces are what separate you from the rest.',
 "Alfajr WY16B Youth Digital Watch  - For Men, Boys - Buy Alfajr WY16B Youth Digital Watch  - For Men, Boys  WY16B Online at Rs.4995 in India Only at Flipkart.com. Azan Times ( For All Cities Around The World ), Prayer Reminders, Qibla Direction, Hijri And Gregorian Calnedars, Qur'An Bookmark, Daily Alrams - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!",
 'TAG Heuer CAU1116.BA0858 Formula 1 Analog Watch  - For Boys, Men - Buy TAG Heuer CAU1116.BA0858 Formula 1 Analog Watch  - For Boys, Men  CAU1116.BA0858 Online at Rs.107750 in India Only at Flipkart.com. Swiss-made, Quartz movement, Scratch resistant, Sapphire crystal glass, Water resistant to 200 metres, Feature Chronograph - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'Calibro SW-125 Analog-Digital Watch  - For Men, Boys\r\n                         Price: Rs. 699\r\n\t\t\t\t\r\n\t\t\tCALIBRO presents MTG Black Dial Round Watch. This is a fashionable watch from MTG that adds to your style statement. This brand is known for its usage of quality material in making appealing watches. It understands the needs of youth so well that it is able to present wide range of watches to its customers. The variety in design, style and youthful approach is very well executed by this brand. Get one and see how brilliantly it leads other brands in style, finish and everything else that matters.\r\nCALIBRO presents MTG Black Dial Round Watch. This is a fashionable watch from MTG that adds to your style statement. This brand is known for its usage of quality material in making appealing watches. It understands the needs of youth so well that it is able to present wide range of watches to its customers. The variety in design, style and youthful approach is very well executed by this brand. Get one and see how brilliantly it leads other brands in style, finish and everything else that matters.',
 'Cartier W6701005 Analog Watch  - For Boys, Men - Buy Cartier W6701005 Analog Watch  - For Boys, Men  W6701005 Online at Rs.201000 in India Only at Flipkart.com. - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'Lois Caron LCS-4032 Analog Watch  - For Boys, Men - Buy Lois Caron LCS-4032 Analog Watch  - For Boys, Men  LCS-4032 Online at Rs.399 in India Only at Flipkart.com. - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'Titan 1639SL03 Analog Watch  - For Boys, Men - Buy Titan 1639SL03 Analog Watch  - For Boys, Men  1639SL03 Online at Rs.1695 in India Only at Flipkart.com. - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!',
 'Srushti Art Jewelry Megnet_Led_Sport_BlackRed1 Digital Watch  - For Men, Women, Boys, Girls - Buy Srushti Art Jewelry Megnet_Led_Sport_BlackRed1 Digital Watch  - For Men, Women, Boys, Girls  Megnet_Led_Sport_BlackRed1 Online at Rs.200 in India Only at Flipkart.com. Led Watch, Sports Led, Megnet watch, Fresh arrival, latest, fancy, Srushti, GirlsWatch, Men, Women, Boys, Wrist Watches - Great Discounts, Only Genuine Products, 30 Day Replacement Guarantee, Free Shipping. Cash On Delivery!']
Home Decor & Festive Needs:
Out[31]:
['Myesquire Ceramic Burner Pot Lemongrass Liquid Air Freshener (30 ml)\r\n                         Price: Rs. 245\r\n\t\t\t\t\r\n\t\t\tIlluminate Every Corner Of Your Living Room In Style With This Home Fragrance Electric Burner Combo With Fragrance That Is Brought To You By Myesquire. This Is A Very Unique, Useful And Value for Money Product For Home Decoration And Fragrance. This Unique Combo Contains Aroma Burner With All Accessories and is ready to use . Put A Small Amount 1-2 Tea Spoon Of Aroma Oil On The Bowl Surface And Light The Tea Light and After Few Minutes As The Oil Heats Up, The Fragrance Will Spread In The Room. And You Will Be All Set To Enjoy The Captivating Aroma. Keep It Away From Children As Surface Becomes Hot.\r\nIlluminate Every Corner Of Your Living Room In Style With This Home Fragrance Electric Burner Combo With Fragrance That Is Brought To You By Myesquire. This Is A Very Unique, Useful And Value for Money Product For Home Decoration And Fragrance. This Unique Combo Contains Aroma Burner With All Accessories and is ready to use . Put A Small Amount 1-2 Tea Spoon Of Aroma Oil On The Bowl Surface And Light The Tea Light and After Few Minutes As The Oil Heats Up, The Fragrance Will Spread In The Room. And You Will Be All Set To Enjoy The Captivating Aroma. Keep It Away From Children As Surface Becomes Hot.',
 'Key Features of VarEesha Wooden Key Holder Hand made using wrought iron and mango wood Color - Brown Material - Sheesham wood Care instructions : Clean with a wet cloth and wipe gently.,VarEesha Wooden Key Holder (8 Hooks, Multicolor) Price: Rs. 1,199 Traditional design combined with utility, this wood and iron designed keybox is a convenient place to store all your keys. The iron design has a glass back to protect from dust and makes this box very manageable. Has 6 hooks inside for your keys which are less frequently used and 2 bigger hooks outside for car keys and other bigger keys which are used daily.,Specifications of VarEesha Wooden Key Holder (8 Hooks, Multicolor) General Brand VarEesha Model Number VSSWH001 Material Wooden Number of Hooks 8 Color Multicolor In the box Sales Package 1 KeyHanger Pack of 1 Dimensions Weight 900 g Other Dimensions L*B*H-20x10x35 cms',
 'Key Features of Treasure Showpiece  -  23 cm Polyresin Height - 23 cm Width - 1 cm,Specifications of Treasure Showpiece  -  23 cm (Polyresin, Blue) General Brand Treasure Model Number TE6 Type Fengshui Material Polyresin Color Blue Dimensions Height 23 cm Width 1 cm Depth 1 cm In the Box Sales Package 1 show piece',
 'Specifications of @home Glass Vase (18 inch, Grey) General Brand @home Suitable For Table Model Number HGFVSCGSTGRY00094 Type Garden Decor Material Glass Color Grey Dimensions Height 18 inch Width 18 inch Depth 42 inch In the Box Sales Package 1 Vase Pack of 1',
 "Key Features of Smart Wall Guru Medium Adhesive Sticker We Showcase A Variety Of Wall Decal Categories Study room Looking Awesome Living room Bath room Attractive Guest room Kitchen Easy to Install Easy to Remove Easy To Clean All Wall Decal are front Matte Finish, water resistant & packaged between two layers.,Smart Wall Guru Medium Adhesive Sticker (Pack of 1) Price: Rs. 299 'Smart Wall Guru'' gives u a pure beautiful house with our decoration wall stickers,wall decals & all kind of designs for your home decoration.These stickers are made of self adhesive PVC vinyl and very easy to apply on plane surface. These are the great suppliment of wallpaper,wall color and another decoration method. Thnkss…,Specifications of Smart Wall Guru Medium Adhesive Sticker (Pack of 1) In The Box Number of Contents in Sales Package Pack of 1 Sales Package 1 Sticker General Ideal Use Home, Child Bedroom, Bedroom Scratch-resistant Yes Brand Smart Wall Guru Laminated Yes Type Adhesive Size in Number 58.43 cm Number of Stickers 1 Material Self Adhesive PVC Vinyl Size Medium Dimensions Height 43 cm Width 58 cm",
 'Parv Collections Showpiece  -  12 cm (Polyresin, Multicolor)\r\n                         Price: Rs. 1,085\r\n\t\t\t\t\r\n\t\t\tLovely Sitting Couple Statue\r\nLovely Sitting Couple Statue',
 'Buy Exotic India Adi Buddha Vajrasattva Showpiece  -  27.94 cm for Rs.21800 online. Exotic India Adi Buddha Vajrasattva Showpiece  -  27.94 cm at best prices with FREE shipping & cash on delivery. Only Genuine Products. 30 Day Replacement Guarantee.',
 'Buy Aapno Rajasthan Princess Design Showpiece  -  22 cm for Rs.999 online. Aapno Rajasthan Princess Design Showpiece  -  22 cm at best prices with FREE shipping & cash on delivery. Only Genuine Products. 30 Day Replacement Guarantee.',
 'Buy Exotic India Vaishnava Symbols - Shankha And Chakra Showpiece  -  37 cm for Rs.6655 online. Exotic India Vaishnava Symbols - Shankha And Chakra Showpiece  -  37 cm at best prices with FREE shipping & cash on delivery. Only Genuine Products. 30 Day Replacement Guarantee.',
 'Buy Exotic India Relaxing Ganesha Showpiece  -  11.43 cm for Rs.10890 online. Exotic India Relaxing Ganesha Showpiece  -  11.43 cm at best prices with FREE shipping & cash on delivery. Only Genuine Products. 30 Day Replacement Guarantee.']
Kitchen & Dining:
Out[31]:
['Key Features of Dungri India Craft Ducjug010-ducgl012-2 Jug Glass Set Jug: Height- 8 inches, Dia - 4.2 inches, Depth 7.5 inches, Width 7 inches,capacity 1744 ml, Weight - 430 Grams,Dungri India Craft Ducjug010-ducgl012-2 Jug Glass Set (Copper) Price: Rs. 1,399 Copper Utensils And Serving Copper Ware Helped To Prevent The Spread Of Diseases. In Many Tests It Showed That 99.9% Of The Bacteria On Copper Alloy Surfaces (With 65% Or Greater Copper Content) Were Eliminated Within 2 Hours Of Exposure. According To Research At Southampton University In The U.K., Mrsa Microbes Remain Alive On Stainless Steel Surfaces For Up To Three Days, Whereas The Same Microbes On A Copper Surface Are Eliminated Within 90 Minutes. These Copper Serving Ware Are Preferred Because It Is Durable And It Has Excellent Characteristics. The Ageless Use Of Copper Bears Testimony To Its Myriad Virtues And Everlasting Appeal. Its Therapeutic Value Has Been Handed Down To Us Through The Ages. In Fact, Water Stored In Copper Ware Is Virtually The Elixir Of Life. Purified, It Helps To Regulate Digestion And Cardiac Orders, The Basis Of Good Health, As Practiced By Ayurveda And Rishits From Old Vedic Times.   Care Tips:  * Clean Your Copperware By Hand And Avoid Dishwasher As The Detergent Can Cause Oxidation Leading To Permanat Damage. * Use A Solution Of Equal Quantities Vinegar Or Lemon Juice And Salt Diluted With Some Water At Such Times But Don\'T Use It Regularly. * Avoid Scratchy Cleaners Like Steel Wool. * A Simple, Soft Cleaning Liquid On Soap Base, Hot Water And A Soft Washing Cloth Is Best For Daily Use.,Specifications of Dungri India Craft Ducjug010-ducgl012-2 Jug Glass Set (Copper) General Brand Dungri India Craft Number of Glasses 2 Model Number Ducjug010-ducgl012-2 Type Jug Glass Set Glass Shape Tumblers Material Copper Capacity per Glass 320 ml Jug Capacity 1.7 L Color Gold Dimensions Jug Height 203 mm Glass Diameter 50 mm Jug Depth 190 mm Other Dimensions Glass: Dia - 2", Height - 4", capacity - 320 ml, Weight - 75 grams In the Box 1 Jug 2 Glass Set Additional Features Microwave Safe No Care Instructions For Better Washing Result Wash With Pitambari Cleaning Power',
 'Key Features of BFT 6 W LED Bulb Pack of 1 Bulb,BFT 6 W LED Bulb (White) Price: Rs. 350 Introducing new technology in led lighting LED filament. Best suited for chandeliers table lamps and light fixtures in general as it spreads light 360-degrees,Specifications of BFT 6 W LED Bulb (White) General Brand BFT Model Number BFTCW6 Body and Design Features Material Glass Bulb Base B22 Lighting Features Light Color White Bulb Type LED Lumen 700 Power Features Power Consumption 6 W In the Box Sales Package 1 Bulb Pack of 1',
 'Key Features of AKUP i-loves-music Ceramic Mug Pack of 1 Mug,AKUP i-loves-music Ceramic Mug (300 ml) Price: Rs. 259 AKUP Ceramic coffee mugs gives you the best quality printed mugs which you can gift to your loved ones. We provide good quality of products with packing. Coffee mugs made of ceramic clay with the volume capacity of 300ml each. You can buy it for yourself to make your moment unforgettable forever. Specifications Product Dimensions: Mugs - Height: 9.5 cms, Diameter: 8 cms. Item Type: Coffee Mug Color: Multi-Color Material: Clay Finish: Glossy Specialty: Lovely Style Wash care: Remove dirt by using a soft fabric to clean and then wipe with a moist cloth. Disclaimer: The fine design, pattern and color tone of the product may vary slightly from that shown in the image. However, there would not be any compromise in quality. Brand: AKUP designs & co,Specifications of AKUP i-loves-music Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe No Dimensions Diameter 8 mm Weight 330 g Height 9.5 mm Depth 8 mm In the Box Sales Package 1 Mug',
 'Key Features of AKUP i-love-my-mom Ceramic Mug Pack of 1 Mug,AKUP i-love-my-mom Ceramic Mug (300 ml) Price: Rs. 259 AKUP Ceramic coffee mugs gives you the best quality printed mugs which you can gift to your loved ones. We provide good quality of products with packing. Coffee mugs made of ceramic clay with the volume capacity of 300ml each. You can buy it for yourself to make your moment unforgettable forever. Specifications Product Dimensions: Mugs - Height: 9.5 cms, Diameter: 8 cms. Item Type: Coffee Mug Color: Multi-Color Material: Clay Finish: Glossy Specialty: Lovely Style Wash care: Remove dirt by using a soft fabric to clean and then wipe with a moist cloth. Disclaimer: The fine design, pattern and color tone of the product may vary slightly from that shown in the image. However, there would not be any compromise in quality. Brand: AKUP designs & co,Specifications of AKUP i-love-my-mom Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe No Dimensions Diameter 8 mm Weight 330 g Height 9.5 mm Depth 8 mm In the Box Sales Package 1 Mug',
 'Key Features of AKUP keep-calm Ceramic Mug Pack of 1 Mug,AKUP keep-calm Ceramic Mug (300 ml) Price: Rs. 259 AKUP Ceramic coffee mugs gives you the best quality printed mugs which you can gift to your loved ones. We provide good quality of products with packing. Coffee mugs made of ceramic clay with the volume capacity of 300ml each. You can buy it for yourself to make your moment unforgettable forever. Specifications Product Dimensions: Mugs - Height: 9.5 cms, Diameter: 8 cms. Item Type: Coffee Mug Color: Multi-Color Material: Clay Finish: Glossy Specialty: Lovely Style Wash care: Remove dirt by using a soft fabric to clean and then wipe with a moist cloth. Disclaimer: The fine design, pattern and color tone of the product may vary slightly from that shown in the image. However, there would not be any compromise in quality. Brand: AKUP designs & co,Specifications of AKUP keep-calm Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe No Dimensions Diameter 8 mm Weight 330 g Height 9.5 mm Depth 8 mm In the Box Sales Package 1 Mug',
 'Key Features of AKUP league-of-legends Ceramic Mug Pack of 1 Mug,AKUP league-of-legends Ceramic Mug (300 ml) Price: Rs. 259 AKUP Ceramic coffee mugs gives you the best quality printed mugs which you can gift to your loved ones. We provide good quality of products with packing. Coffee mugs made of ceramic clay with the volume capacity of 300ml each. You can buy it for yourself to make your moment unforgettable forever. Specifications Product Dimensions: Mugs - Height: 9.5 cms, Diameter: 8 cms. Item Type: Coffee Mug Color: Multi-Color Material: Clay Finish: Glossy Specialty: Lovely Style Wash care: Remove dirt by using a soft fabric to clean and then wipe with a moist cloth. Disclaimer: The fine design, pattern and color tone of the product may vary slightly from that shown in the image. However, there would not be any compromise in quality. Brand: AKUP designs & co,Specifications of AKUP league-of-legends Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe No Dimensions Diameter 8 mm Weight 330 g Height 9.5 mm Depth 8 mm In the Box Sales Package 1 Mug',
 'Key Features of AKUP life-is-not-living Ceramic Mug Pack of 1 Mug,AKUP life-is-not-living Ceramic Mug (300 ml) Price: Rs. 259 AKUP Ceramic coffee mugs gives you the best quality printed mugs which you can gift to your loved ones. We provide good quality of products with packing. Coffee mugs made of ceramic clay with the volume capacity of 300ml each. You can buy it for yourself to make your moment unforgettable forever. Specifications Product Dimensions: Mugs - Height: 9.5 cms, Diameter: 8 cms. Item Type: Coffee Mug Color: Multi-Color Material: Clay Finish: Glossy Specialty: Lovely Style Wash care: Remove dirt by using a soft fabric to clean and then wipe with a moist cloth. Disclaimer: The fine design, pattern and color tone of the product may vary slightly from that shown in the image. However, there would not be any compromise in quality. Brand: AKUP designs & co,Specifications of AKUP life-is-not-living Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe No Dimensions Diameter 8 mm Weight 330 g Height 9.5 mm Depth 8 mm In the Box Sales Package 1 Mug',
 'Key Features of Cookart Elegant S.S. Handi With Hot Pot Pack of 4 Casserole Set Pack of 4 Cook and Serve Casserole,Cookart Elegant S.S. Handi With Hot Pot Pack of 4 Casserole Set (1500 ml, 2.25 L) Price: Rs. 699 Cookart introduces a perfect fusion of style and durability, this 4 piece stainless steel casserole & handi set will be a perfect addition to your kitchen. This set is made of the finest quality stainless steel material. The high quality material also take care of your health and hygiene. Handi Capacity: 0.500 ML, 0.750ML, 1 L,Specifications of Cookart Elegant S.S. Handi With Hot Pot Pack of 4 Casserole Set (1500 ml, 2.25 L) Care and Use Features Microwave Safe No General Brand Cookart Casserole Type Cook and Serve Casserole Model Number ECH1 Type Casserole Set Model Name Elegant S.S. Handi With Hot Pot Material Stainless Steel Capacity 1500 ml, 2.25 L Color Silver Body and Design Features Lid Included Yes Non-stick Coating No In the box Sales Package 1 Casserole, 3 Handi Pack of 4',
 'Specifications of Shopmania Happy Birthday Gift For Starting Letter Q Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe Yes Dimensions Diameter 80 mm Weight 400 g Height 95 mm Width 120 mm Depth 80 mm In the Box Sales Package 1 Mug',
 'Key Features of Shopmania Happy Birthday Gift For Starting Letter G Ceramic Mug Pack of 1 Mug,Specifications of Shopmania Happy Birthday Gift For Starting Letter G Ceramic Mug (300 ml) General Type Mug Mug Capacity 300 ml Material Ceramic Freezer Safe Yes Microwave Safe Yes Dimensions Diameter 80 mm Weight 400 g Height 95 mm Width 120 mm Depth 80 mm In the Box Sales Package 1 Mug']
Beauty and Personal Care:
Out[31]:
['Key Features of Burt s Bees Hair Repair Shea And Grapefruit Deep Conditioner Ideal For: Boys, Girls Applied For: Deep Conditioning Hair Condition: Damaged Hair Hair Type: Dry Hair,Specifications of Burt s Bees Hair Repair Shea And Grapefruit Deep Conditioner (147.5 ml) Conditioner Traits Hair Condition Damaged Hair Hair Type Dry Hair Applied For Deep Conditioning Ideal For Boys, Girls General Traits Brand Burt s Bees Quantity 147.5 ml Model Name Hair Repair Shea And Grapefruit Deep Conditioner In the Box 1 Conditioner',
 'Key Features of Suave Naturals Everlasting Sunshine Conditioner Ideal For: Boys, Girls Applied For: Conditioning Hair Condition: Damaged Hair Hair Type: Dry Hair,Specifications of Suave Naturals Everlasting Sunshine Conditioner (665 ml) Conditioner Traits Hair Condition Damaged Hair Hair Type Dry Hair Applied For Conditioning Ideal For Boys, Girls General Traits Brand Suave Quantity 665 ml Model Name Naturals Everlasting Sunshine Conditioner In the Box 1 Conditioner',
 'Buy Borse N16 Make Up And Jewellery Vanity Case for Rs.680 online. Borse N16 Make Up And Jewellery Vanity Case at best prices with FREE shipping & cash on delivery. Only Genuine Products. 30 Day Replacement Guarantee.',
 'Specifications of RYTHMX FB NAIL POLISHES BLACK LIPSTICKS IMPORTANT COMBO 51 (Set of 4) Combo Set Details Number of Contents in Combo Set 4 Combo Set Contents 2 Nail polish, 2 LIPSTICKS Organic Yes Ideal For Women General Traits Professional Care Yes Warranty After use Lipsticks No Exchange / No Return',
 'Key Features of RYTHMX FB NAIL POLISHES BLACK LIPSTICKS IMPORTANT COMBO 60 12 Hours Stay Made in India Soft & Smooth Creamy Matte,RYTHMX FB NAIL POLISHES BLACK LIPSTICKS IMPORTANT COMBO 60 (Set of 4) Price: Rs. 399 Rythmx a best Quality Intense fashionable color which comes in chic to vivid shades.Its ultra-creamy texture glides smoothly , covering the creases.,Specifications of RYTHMX FB NAIL POLISHES BLACK LIPSTICKS IMPORTANT COMBO 60 (Set of 4) Combo Set Details Number of Contents in Combo Set 4 Combo Set Contents 2 Nail polish, 2 LIPSTICKS Organic Yes Ideal For Women General Traits Professional Care Yes Warranty After use Lipsticks No Exchange / No Return',
 'Specifications of Brillare Science Dandruff Control Shampoo & Intenso Creme Combo (Set of) Combo Set Details Combo Set Contents 1 Dandruff Control Shampoo 150ml, 1 Dandruff Control Intenso Creme 125g Ideal For Women, Men Organic No General Traits Professional Care Yes',
 'Mars Queen Eye Liner, Lip Gloss, EyeLiner (Set of 3) Price: Rs. 299 1 Queen Eye Liner, 1 Lip Gloss, 1 EyeLiner,Specifications of Mars Queen Eye Liner, Lip Gloss, EyeLiner (Set of 3) Combo Set Details Number of Contents in Combo Set 3 Combo Set Contents 1 Queen Eye Liner, 1 Lip Gloss, 1 EyeLiner Ideal For Women Organic No General Traits Professional Care Yes Gift Pack Yes',
 'Specifications of RYTHMX FB NAIL POLISHES BLACK LIPSTICKS IMPORTANT COMBO 47 (Set of 4) Combo Set Details Number of Contents in Combo Set 4 Combo Set Contents 2 Nail polish, 2 LIPSTICKS Organic Yes Ideal For Women General Traits Professional Care Yes Warranty After use Lipsticks No Exchange / No Return',
 'Specifications of Aroma Care Intense Color Nail Polish Combo 175057 49.5 ml (Multicolor,) Nail Polish Traits Quantity 49.5 ml Shade Multicolor, Organic Yes Warranty After Use No Exchange No Return Additional Traits No Exchange. No Return. No Warranty. Color Fidelity:The colors displayed here are indicative and may not be precise representations of actual shades due to variance in monitor calibrations and resolution as well as screen settings. In the Box 5 Nail Polishs',
 'Specifications of KE GIFT BOX STOR JEWELLARY Vanity Jewellery (BLUE) Additional Features GIFT BOX Body Features Body Material WOOD, ALUMINIUM Number of Drawers 1 Drawer Number of Compartments 1 Compartment In the Box Sales Package GIFT BOX']
Computers:
Out[31]:
['Key Features of D-Link DAP1320 QRS app(iOS,Android),Specifications of D-Link DAP1320 (White) Speed Wireless Speed 300 Mbps DSL Modem Speed 300 Mbps General Brand D-Link In The Box Extender Plug Model DAP1320 Type Range Extenders/Repeaters Color White Warranty Covered in Warranty No damage Warranty Summary 3years Service Type No damage Not Covered in Warranty damaged will not repair Dimensions Weight 69 g Connectivity Number of Antennae 2 Number of USB Ports 0 Antennae Internal Supported Protocols Standard IEEE IEEE 802.11n/g/b Operating Conditions Operating Humidity Operating: 10% to 90% non-condensing // Storage: 5% to 95% non-condensing Temperature Operating: 0 to 40 °C (32 to 104 °F)//Storage: -20 to 65 °C (-4 to 149 °F) degree C Additional Features Certification FCC, IC, CE',
 "Key Features of RoQ Slim Multimedia 105key Flexible Wired USB Flexible Keyboard Size: Handheld,RoQ Slim Multimedia 105key Flexible Wired USB Flexible Keyboard (White) Price: Rs. 399 Working Environment. It Fits Easily In Your Briefcase Or Backpack. The Keyboard Can Be Cleaned With Water, Alcohol Or Disinfectant To Help Prevent The Spread Of The Disease And Germs. It Is Suitable For Public Place Use Such As School, Restaurant, Or Hospital Where Clean Keyboard Is A Requirement. This Sealed Keyboard Can Be Easily Cleaned, Also Good For Use In Dusty Area Such As Factory, Or Outdoor. Quiet Keystroke Is A Good Match For Library Use. If You Are Not Used To This Keyboard, You May Push These Keys A Tiny Bit Harder To Get Output. Dustproof Washable Usb Plug Weight: 0.45lbs (200g) Operation Temperature Is -20c- +60c. Storage Temperature Is -30c - +70c,Specifications of RoQ Slim Multimedia 105key Flexible Wired USB Flexible Keyboard (White) General Specifications Interface Wired USB Brand RoQ Model Slim Multimedia 105key Flexible Sales Package 1 keyboard Type Flexible Keyboard Model Name Slim Multimedia Flexible Keyboard Color White Size Handheld Warranty Covered in Warranty Manufacturing Damage Warranty Summary 10 day's Replacement Warranty Service Type Replacement Not Covered in Warranty Physical Damage",
 'Key Features of JRB 1038 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1038 USB Fan System Requirements: OTG Enabled Android Smart Phone Material: Plastic,Specifications of JRB 1038 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1038 USB Fan (Multicolor) General Brand JRB Type USB Fan Model Name 1038 Smallest Mobile Powered By OTG Enabled Android Smart Phone Material Plastic System Requirements OTG Enabled Android Smart Phone Model ID Portable 1038 Color Multicolor In the Box Sales Package 1 MICRO USB FAN',
 'Key Features of JRB 1033 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1033 USB Fan System Requirements: OTG Enabled Android Smart Phone Material: Plastic,Specifications of JRB 1033 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1033 USB Fan (Multicolor) General Brand JRB Type USB Fan System Requirements OTG Enabled Android Smart Phone Material Plastic Model Name 1033 Smallest Mobile Powered By OTG Enabled Android Smart Phone Model ID Portable 1033 Color Multicolor In the Box Sales Package 1 MICRO USB FAN',
 'Key Features of JRB 1042 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1042 USB Fan System Requirements: OTG Enabled Android Smart Phone Material: Plastic,Specifications of JRB 1042 Smallest Mobile Powered By OTG Enabled Android Smart Phone Portable 1042 USB Fan (Multicolor) General Brand JRB Type USB Fan System Requirements OTG Enabled Android Smart Phone Material Plastic Model Name 1042 Smallest Mobile Powered By OTG Enabled Android Smart Phone Model ID Portable 1042 Color Multicolor In the Box Sales Package 1 MICRO USB FAN',
 'Key Features of Product The graphically rich 3D audio-video captures complete attention which leads to a good understanding of the subject Concepts that are not understood through text are easily understood through videos Well-designed complete learning modules for each topic Student centric learning management software helps learner to improve his learning outcomes with least effort Complete and comprehensive curriculum coverage,Eureka.in GSEB Class VII (Pen drive) Price: Rs. 2,999 Our premium product, Eureka.in for class VII Science (GSEB board) offers high quality 3d learning content, will surely help students to improve their performance in exams. It will also help them to lay a solid foundation for future learning challenges.,Specification of Product General Subjects Science, Mathematics Brand Eureka.in Num Of Disks 1 Classes 7 Sales Package High quality 3d animated videos, Interactive 3d simulations, Script, Interactive learning quiz, Web links, Many other attractive features. Type Visual Media Model Name GSEB Class VII Education Board Gujarat Secondary Education Board Subscription Validity 1 Year Model Id GB-07 Features 296 High quality 3d animated videos, 145 Interactive & 3d simulations, Script, Interactive learning quiz, Web links, Many other attractive features System Requirements Memory 2 GB RAM, 32GB space in Harddisk Operating System Windows 7 32/64,, Windows 8 32/64, Windows 8.1 32/72 Other Requirements Graphics Card:Nvidia Geforce 9400 or compatible, Sound Card & Speakers: Stereo sound card and speakers, LAN: 1 GB per second Hard Drive 32 GB Processor Core 2 Duo processor 1.86 GHz or above',
 'Filink 310 Black Toner (Black)\r\n                         Price: Rs. 1,390\r\n\t\t\t\t\r\n\t\t\tCompatible with the following printers: Canon LBP 3460. We guarantee consistent & high quality printing, high page yields, dark prints. The toner is brand new and ISO 9001 quality certified.\r\nCompatible with the following printers: Canon LBP 3460. We guarantee consistent & high quality printing, high page yields, dark prints. The toner is brand new and ISO 9001 quality certified.',
 "Key Features of FRENEMY MPAD271 Mousepad smooth surface finish move quickly glossy rich finish easy to clean,FRENEMY MPAD271 Mousepad (Multicolor) Price: Rs. 169 FRENEMY not only sale best quality mousepad but also make a long lasting relationship with buyer by quality products. FRENEMY Mousepad is smooth surface finish, making mouse move quickly across the entire mousepad. Zero hindrance, broad area with glossy rich finish. Brilliant imprint permanently print into pad. It won't crack or peel & Stain-resistant, high-density fabric construction is easy-to-clean.,Specifications of FRENEMY MPAD271 Mousepad (Multicolor) General Specifications Brand FRENEMY Material Non-slip Rubber Base, High Quality non slip fabric Platform PC Model Id MPAD271 Features Duratble & flexible for portibility Color Multicolor Dimensions Weight 150 g Length 179 mm Height 3 mm Width 218 mm In the Box Mousepad, Warranty Card",
 'Key Features of Shopizone BMW Wheel 32 GB  Pen Drive Case Material: Rubber Interface: USB 2.0 Operating System: Windows XP, Vista,Shopizone BMW Wheel 32 GB  Pen Drive (Black) Price: Rs. 999 This is a 32 GB pen drive. Cool and flaunty. These are basically pen drive who are disguised as superheroes. Adorable and cute. This works with all the operating systems and is handy and strong. Add bling and style to your pen drive with shopizone’s funky & innovative collection of 32GB flash drives for all age groups!,Specifications of Shopizone BMW Wheel 32 GB  Pen Drive (Black) General Specifications Interface USB 2.0 Brand Shopizone Capacity (GB) 32 GB Model BMW Wheel Case Material Rubber OS Supported Windows XP, Vista USB on the go No Color Black Dimensions Weight 100 g In the Box 1 Pendrive',
 'Specifications of I Ball IBCTDC 160/2gb/DDR2 with Dual Core 2 RAM 160 Hard Disk (Free DOS) Performance Features Processor Speed 2.33 GHz Processor Name Intel Processor Type Dual Core Number of Cores 2 General Brand I Ball Operating System Free DOS Graphics Memory NA GB Model Name IBCTDC 160/2gb/DDR2 Graphics Intel onboard System Memory and Storage Features Memory Technology DDR2 RAM 2 GB Hard Disk Capacity 160 GB Warranty Covered in Warranty All Parts of the Product Service Type 1 Year Domestic Warranty Warranty Summary 1 Year Domestic warranty covered for all parts of the product Not Covered in Warranty Burning or Physically Damaged Dimensions Weight 5 kg Height 50 cm Width 40 cm Depth 30 cm In the Box Sales Package CPU, Driver CD, Power Cable']
In [32]:
# Plots Countplot and Wordcloud without word preprocessing

for i in flipkart_fork_1['product_category_node_1'].unique():

    category_tokeinzed_descriptions_list = \
        flipkart_fork_1[
            flipkart_fork_1['product_category_node_1'] == i]\
            ['description_tokenized_no_punct'].tolist()

    category_tokeinzed_descriptions_list_flat = \
        [item for sublist in category_tokeinzed_descriptions_list for item in sublist]

    category_frequency = nltk.FreqDist(category_tokeinzed_descriptions_list_flat)

    
    # Plots countplot

    fig, ax = plt.subplots(figsize=(20, 10))

    ax = category_frequency.plot(50,
                                 cumulative=False,
                                 show=False,
                                 color=sns.color_palette('PuOr')[5])

    ax.tick_params(labelsize=tick_size - 10)
    ax.set_xlabel('Samples', fontsize=text_size)
    ax.set_ylabel('Count', fontsize=text_size)

    plt.title('{} Top 50 Word Freqency Plot'.format(i),
              fontsize=title_size - 10,
              pad=20)

    plt.show()

    # Plots Wordcloud

    wcloud = \
        WordCloud(colormap=sns.color_palette('PuOr', as_cmap=True),
                  background_color='black',
                  contour_color='black',
                  width=600,
                  height=300,
                  max_words=50,
                  random_state=random_seed)\
        .generate_from_frequencies(category_frequency)

    fig, ax = plt.subplots(1, 1, figsize=(20, 10))

    plt.imshow(wcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

    print('----------------------------------------------------')

plt.show();
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------

Removing Stop Words

In [33]:
# Retrieves Spacy's defaul stop words

stop_words = list(nlp.Defaults.stop_words)

# Retrieves the most frequent words in the whole 
# corpora that aren't in the default stop words

most_frequent_words = \
    [i for i in dict(corpora_frequency.most_common(35)).keys() 
     if i not in stop_words]


# Displays the length of each above retrieved lists

len(stop_words), len(most_frequent_words)

# Unites the two above lists

stop_words += most_frequent_words

# Displays the lenght of the updated stop_word list

len(stop_words)
Out[33]:
(326, 18)
Out[33]:
344
In [34]:
# Creates description_tokenized_no_punct_no_stop_words

flipkart_fork_1['description_tokenized_no_punct_no_stop_words'] = \
    flipkart_fork_1['description']\
        .apply(lambda x : tokenize_doc_no_punct_no_stop_words(x, stop_words))
In [35]:
# Plots Countplot and Wordcloud with a basic word preprocessing

for i in flipkart_fork_1['product_category_node_1'].unique():

    category_tokeinzed_descriptions_list = \
        flipkart_fork_1[
            flipkart_fork_1['product_category_node_1'] == i]\
            ['description_tokenized_no_punct_no_stop_words'].tolist()

    category_tokeinzed_descriptions_list_flat = \
        [item for sublist in category_tokeinzed_descriptions_list for item in sublist]

    category_frequency = nltk.FreqDist(category_tokeinzed_descriptions_list_flat)

    
    # Plots countplot

    fig, ax = plt.subplots(figsize=(20, 10))

    ax = category_frequency.plot(50,
                                 cumulative=False,
                                 show=False,
                                 color=sns.color_palette('PuOr')[5])

    ax.tick_params(labelsize=tick_size - 10)
    ax.set_xlabel('Samples', fontsize=text_size)
    ax.set_ylabel('Count', fontsize=text_size)

    plt.title('{} Top 50 Word Freqency Plot'.format(i),
              fontsize=title_size - 10,
              pad=20)

    plt.show()

    # Plots Wordcloud

    wcloud = \
        WordCloud(colormap=sns.color_palette('PuOr', as_cmap=True),
                  background_color='black',
                  contour_color='black',
                  width=600,
                  height=300,
                  max_words=50,
                  random_state=random_seed)\
        .generate_from_frequencies(category_frequency)

    fig, ax = plt.subplots(1, 1, figsize=(20, 10))

    plt.imshow(wcloud, interpolation='bilinear')
    plt.axis("off")
    plt.show()

    print('----------------------------------------------------')

plt.show();
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------
----------------------------------------------------

Computer Vision EDA

In [36]:
# Conveniency feature for image path

flipkart_fork_1['image_path'] = flipkart_fork_1['image'].apply(lambda x : path + x)
In [37]:
# Displays 10 images for each categorie

for i in flipkart_fork_1['product_category_node_1'].unique():
    
    print('\033[1m' + i + ":")
    
    
    
    
    fig, axes = \
        plt.subplots(figsize=(20,8), ncols=5, nrows=2)
    
    axes = axes.flatten()
    
    for j, k  in enumerate(flipkart_fork_1[
                flipkart_fork_1['product_category_node_1'] == i]\
                ['image_path'].iloc[:10]):
        
        image = cv2.imread(k)
        image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
        axes[j].imshow(image_rgb)
        
    plt.show();
    
plt.show();
Home Furnishing:
Baby Care:
Watches:
Home Decor & Festive Needs:
Kitchen & Dining:
Beauty and Personal Care:
Computers:

Computer vision Equalization, SIFT and ORB example

In [38]:
# Reads an image

watch_image = cv2.imread(flipkart_fork_1.loc[6,'image_path'])

# Converts it in RGB 

watch_image_rgb = cv2.cvtColor(watch_image, cv2.COLOR_BGR2RGB)

# Displays it

fig, ax = plt.subplots(figsize=(7,7))
plt.imshow(watch_image_rgb)
plt.title('Watch image Original', fontsize=title_size - 20, pad=30)
plt.show();
In [39]:
# Converts the BGR image into gray nuances image 

watch_image_gray = cv2.cvtColor(watch_image, cv2.COLOR_BGR2GRAY)

fig, ax = plt.subplots(figsize=(7,7))
plt.imshow(watch_image_gray, cmap='gray')
plt.title('Watch image Gray Scale', fontsize=title_size - 20, pad=30)
plt.show();
In [40]:
# Displays the gray image equalized

watch_image_gray_equalized = cv2.equalizeHist(watch_image_gray)

fig, ax = plt.subplots(figsize=(7,7))
plt.imshow(watch_image_gray_equalized, cmap='gray')
plt.title('Watch image Equalized', fontsize=title_size - 20, pad=30)
plt.show();
In [41]:
# Loads SIFT and ORB algorithms, the 500 most important keypoints are kept

sift = cv2.SIFT_create(500)
orb = cv2.ORB_create(500)
In [42]:
# Applies the respective algorithms, computes the keypoints and detectors for each

sift_keypoints, sift_descriptors = sift.detectAndCompute(watch_image_gray_equalized, None)
orb_keypoints, orb_descriptors = orb.detectAndCompute(watch_image_gray_equalized, None)
In [43]:
# Displays SIFT keypoints onto the rgb image

sift_image_with_keypoint = \
    cv2.drawKeypoints(watch_image_rgb,
                      sift_keypoints,
                      watch_image_gray,
                      flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
                      color=(0,255,0))
fig, ax = plt.subplots(figsize=(7,7))
plt.imshow(sift_image_with_keypoint)
plt.title('SIFT 500 first keypoints on a watch', fontsize=title_size - 20, pad=30)
plt.show();
In [44]:
# Displays ORB keypoints onto the rgb image

orb_image_with_keypoint = \
    cv2.drawKeypoints(watch_image_rgb,
                      orb_keypoints,
                      watch_image_gray,
                      flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS,
                      color=(0,255,0))


fig, ax = plt.subplots(figsize=(7,7))
plt.imshow(orb_image_with_keypoint)
plt.title('ORB 500 first keypoints on a watch', fontsize=title_size - 20, pad=30)
plt.show();

Export

In [49]:
# Exports the flipkart_fork_1

#flipkart_fork_1\
#    .to_csv(r'F:\Data\Projet 6\Cleaned\flipkart_cleaned', index=False)